vllm serve "/mnt/share/1/Qwen/Qwen3-Embedding-0.6B/" \
    --host 0.0.0.0 \
    --port 8000 \
    --max-model-len 8192 \
    --served-model-name Qwen3-Embedding-0.6B \
    --task embed --hf_overrides '{"is_matryoshka":true}'


# Test the embedding model

# curl -X POST http://0.0.0.0:8000/v1/embeddings \
# -H "Content-Type: application/json" \
# -d '{
#     "model": "/mnt/share/1/Qwen/Qwen3-Embedding-0.6B/",
#     "input": "强制思考模式",
# 	  "dimensions":128
# }'